Land Cover Classification in Southern Santa Barbara County

Author : Fletcher McConnell

Repository: https://github.com/fletcher-m/land-cover-classification

About

In this repository, I create a land classification map of Southern Santa Barbara county, from data collected from landsat5 satellite and filtered through a decision tree.

Highlights

  • Raster manipulation with terra

  • Geospatial data wrangling with sf

  • Making a decision tree with rpart

  • Visualizing land cover with Tmap

About the Data

In this R markdown document, I use data from the landsat5 satellite https://www.usgs.gov/landsat-missions/landsat-5. This data contains one scene from September 25, 2017. It contains bands 1, 2, 3, 4, 5, 7 (green, blue, red, NIR, SWIR1, SWIR2). The other data used are shapefiles with one containing a polygon representing Southern Santa Barbara County (“SB_county_south_shp”) and the other represents polygons of training sites (“trainingdata.shp”).

Final Output

Import Libraries

library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(terra)
## terra 1.7.55
library(here)
## here() starts at /Users/fletchermcconnell/Documents/EDS 223/land_cover_classification
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:terra':
## 
##     intersect, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rpart)
library(rpart.plot)
library(tmap)
## 
## Attaching package: 'tmap'
## The following object is masked from 'package:datasets':
## 
##     rivers
rm(list = ls())

Load Landsat Data and Update Layer Names

# list files for each band, including the full file path
filelist <- list.files("/Users/fletchermcconnell/Documents/EDS 223/land_cover_classification/landsat-data", full.names = TRUE)

# read in and store as a raster stack
landsat_20070925 <- rast(filelist)

# update layer names to match band
names(landsat_20070925) <- c("blue", "green", "red", "NIR", "SWIR1", "SWIR2")

# plot true color image
plotRGB(landsat_20070925, r = 3, g = 2, b = 1, stretch = "lin")

Load Study Area for Southern Santa Barbara County

# read in shapefile for southern portion of SB county
SB_county_south <- st_read("/Users/fletchermcconnell/Documents/EDS 223/land_cover_classification/data (4)/SB_county_south.shp")

# project to match the Landsat data
SB_county_south <- st_transform(SB_county_south, crs = crs(landsat_20070925))

Crop and Mask Landsat Data to Study Area

# crop Landsat scene to the extent of the SB county shapefile
landsat_cropped <- crop(landsat_20070925, SB_county_south)

# mask the raster to southern portion of SB county
landsat_masked <- mask(landsat_cropped, SB_county_south)

# remove unnecessary object from environment
rm(landsat_20070925, SB_county_south, landsat_cropped)

Convert Landsat Values to Reflectance

# reclassify erroneous values as NA
rcl <- matrix(c(-Inf, 7273, NA,
                 43636, Inf, NA), ncol = 3, byrow = TRUE)

landsat <- classify(landsat_masked, rcl = rcl)

# adjust values based on scaling factor
landsat <- (landsat * 0.0000275 - 0.2) * 100

# plot true color image to check results
plotRGB(landsat, r = 3, g = 2, b = 1, stretch = "lin")

# check values are 0 - 100
summary(landsat)
##       blue           green            red             NIR       
##  Min.   : 1.11   Min.   : 0.74   Min.   : 0.00   Min.   : 0.23  
##  1st Qu.: 2.49   1st Qu.: 2.17   1st Qu.: 1.08   1st Qu.: 0.75  
##  Median : 3.06   Median : 4.59   Median : 4.45   Median :14.39  
##  Mean   : 3.83   Mean   : 5.02   Mean   : 4.92   Mean   :11.52  
##  3rd Qu.: 4.63   3rd Qu.: 6.76   3rd Qu.: 7.40   3rd Qu.:19.34  
##  Max.   :39.42   Max.   :53.32   Max.   :56.68   Max.   :57.08  
##  NA's   :39856   NA's   :39855   NA's   :39855   NA's   :39856  
##      SWIR1           SWIR2      
##  Min.   : 0.10   Min.   : 0.20  
##  1st Qu.: 0.41   1st Qu.: 0.60  
##  Median :13.43   Median : 8.15  
##  Mean   :11.88   Mean   : 8.52  
##  3rd Qu.:18.70   3rd Qu.:13.07  
##  Max.   :49.13   Max.   :48.07  
##  NA's   :42892   NA's   :46809

Extract Reflectance Values for Training Data

# read in and transform training data
training_data <- st_read("/Users/fletchermcconnell/Documents/EDS 223/land_cover_classification/data (4)/trainingdata.shp") %>%
  st_transform(., crs = crs(landsat))

# extract reflectance values at training sites
training_data_values <- extract(landsat, training_data, df = TRUE)

# convert training data to data frame
training_data_attributes <- training_data %>%
  st_drop_geometry()

# join training data attributes and extracted reflectance values
SB_training_data <- left_join(training_data_values, training_data_attributes,
                              by = c("ID" = "id")) %>%
  mutate(type = as.factor(type)) # convert landcover type to factor

Train the Decision Tree

# establish model formula
SB_formula <- type ~ red + green + blue + NIR + SWIR1 + SWIR2

# train decision tree
SB_decision_tree <- rpart(formula = SB_formula,
                          data = SB_training_data,
                          method = "class",
                          na.action = na.omit)

# plot decision tree
prp(SB_decision_tree)

Apply Decision Tree to Southern Santa Barbara

# classify image based on decision tree
SB_classification <- predict(landsat, SB_decision_tree, type = "class", na.rm = TRUE)

# inspect level to understand the order of classes in prediction
levels(SB_training_data$type)
## [1] "green_vegetation" "soil_dead_grass"  "urban"            "water"

Make a Plot of Landcover Classification in SB

# plot results

tm_shape(SB_classification) +
  tm_raster(col.scale = tm_scale_categorical(values = c("#8DB580", "#F2DDA4", "#7E8987", "#6A8EAE")),
            col.legend = tm_legend(labels = c("green vegetation", "soil/dead grass", "urban", "water"),
                                   title = "Landcover type")) +
  tm_layout(legend.position = c("left", "bottom"))
## SpatRaster object downsampled to 621 by 1612 cells.